Used packages

1.Leaflet

2.ggmap

Libraries in use

library(rpart)
library(rpart.plot)
library(ggplot2)
library(ggmap)
library(leaflet)

Load CSV file

#CleanedListings <- read.csv("CleanedListings.csv")
CleanedListings <- readRDS("data.Rda")

str(CleanedListings)
## 'data.frame':    5206 obs. of  157 variables:
##  $ id                                        : int  13824783 16740225 18125245 8362570 789867 16701336 8148031 6713059 17620535 12581064 ...
##  $ name                                      : Factor w/ 5169 levels "","(01A) Apartment Steps From Chicago's Grant Park",..: 2083 2293 1692 2759 1624 3065 3930 3871 4395 1928 ...
##  $ host_id                                   : int  55020055 36722941 51669215 32837114 2782694 36722941 43018961 2441740 756599 23503681 ...
##  $ host_name                                 : Factor w/ 1790 levels "5 Star Resorts Plus",..: 1441 418 1679 683 284 418 1392 928 1356 589 ...
##  $ host_since                                : Date, format: "2016-01-17" "2015-06-25" ...
##  $ host_response_time                        : Factor w/ 5 levels "","a few days or more",..: 5 5 5 4 5 5 5 1 5 5 ...
##  $ host_response_rate                        : num  100 100 100 100 100 ...
##  $ host_is_superhost                         : int  1 0 0 1 0 0 0 0 0 1 ...
##  $ host_listings_count                       : int  1 3 1 1 1 3 1 1 1 2 ...
##  $ host_total_listings_count                 : int  1 3 1 1 1 3 1 1 1 2 ...
##  $ host_verifications                        : Factor w/ 229 levels "[]","['email', 'facebook', 'reviews', 'jumio']",..: 91 201 188 91 51 201 122 199 93 138 ...
##  $ host_identity_verified                    : int  1 1 1 1 1 1 1 1 1 1 ...
##  $ street                                    : Factor w/ 268 levels "Albany Park, Chicago, IL 60618, United States",..: 154 65 154 154 155 154 155 65 65 154 ...
##  $ neighbourhood_cleansed                    : Factor w/ 72 levels "Albany Park",..: 36 36 36 36 36 36 36 36 36 36 ...
##  $ city                                      : Factor w/ 14 levels "Calumet Park",..: 4 4 4 4 4 4 4 4 4 4 ...
##  $ state                                     : Factor w/ 3 levels "il","Il","IL": 3 3 3 3 3 3 3 3 3 3 ...
##  $ zipcode                                   : Factor w/ 64 levels "","60202","60302",..: 30 30 30 30 43 30 43 30 30 30 ...
##  $ market                                    : Factor w/ 4 levels "","Chicago","Chico",..: 2 2 2 2 2 2 2 2 2 2 ...
##  $ smart_location                            : Factor w/ 17 levels "Calumet Park, IL",..: 8 8 8 8 8 8 8 8 8 8 ...
##  $ country                                   : Factor w/ 1 level "United States": 1 1 1 1 1 1 1 1 1 1 ...
##  $ latitude                                  : num  42 42 42 42 42 ...
##  $ longitude                                 : num  -87.7 -87.7 -87.7 -87.7 -87.7 ...
##  $ is_location_exact                         : int  1 0 1 1 1 1 1 0 0 1 ...
##  $ property_type                             : Factor w/ 4 levels "Apartment","Condominium",..: 3 2 4 2 1 2 1 1 1 1 ...
##  $ room_type                                 : Factor w/ 3 levels "Entire home/apt",..: 2 2 2 2 2 2 2 2 1 1 ...
##  $ accommodates                              : int  4 2 6 2 3 2 2 1 4 4 ...
##  $ bathrooms                                 : num  1 1 1.5 1 1 1 1 1 1 1 ...
##  $ bedrooms                                  : num  1 1 2 1 1 1 1 1 2 2 ...
##  $ beds                                      : num  2 1 3 1 1 1 1 1 2 2 ...
##  $ bed_type                                  : Factor w/ 5 levels "Airbed","Couch",..: 5 5 5 5 5 5 3 5 5 5 ...
##  $ price                                     : num  56 36 80 80 20 28 45 48 149 331 ...
##  $ weekly_price                              : num  56 36 80 80 20 28 45 48 149 331 ...
##  $ monthly_price                             : Factor w/ 288 levels "$1,000.00","$10,000.00",..: 35 35 35 35 35 35 35 35 35 35 ...
##  $ security_deposit                          : num  0 0 150 0 0 0 0 0 0 0 ...
##  $ cleaning_fee                              : num  30 35 65 10 0 35 6 25 75 20 ...
##  $ guests_included                           : int  2 1 4 2 1 1 1 1 4 1 ...
##  $ extra_people                              : num  15 20 15 10 10 20 0 0 15 0 ...
##  $ minimum_nights                            : int  2 1 1 2 3 1 1 1 2 1 ...
##  $ maximum_nights                            : int  1125 7 90 4 20 7 21 5 1125 3 ...
##  $ availability_30                           : int  1 0 11 4 13 0 5 17 14 19 ...
##  $ availability_60                           : int  8 0 38 22 23 0 27 47 34 42 ...
##  $ availability_90                           : int  14 0 68 52 38 0 36 77 64 71 ...
##  $ availability_365                          : int  32 0 158 52 115 0 36 352 339 71 ...
##  $ number_of_reviews                         : int  25 0 4 12 96 1 5 14 0 0 ...
##  $ first_review                              : Date, format: "2016-07-23" NA ...
##  $ last_review                               : Date, format: "2017-05-07" NA ...
##  $ review_scores_rating                      : num  99 95 95 98 94 ...
##  $ instant_bookable                          : int  1 1 0 0 1 1 0 0 0 1 ...
##  $ cancellation_policy                       : Factor w/ 5 levels "flexible","moderate",..: 2 3 2 1 3 3 1 1 3 2 ...
##  $ calculated_host_listings_count            : int  1 3 1 1 1 3 1 1 1 2 ...
##  $ reviews_per_month                         : num  2.57 0 4 0.87 1.96 0.28 0.92 0.63 0 0 ...
##  $ Description_ClusterGroup                  : Factor w/ 3 levels "0","1","2": 1 1 1 2 2 1 1 1 2 2 ...
##  $ X24.hour.check.in                         : int  1 1 0 0 1 1 0 0 0 1 ...
##  $ Accessible.height.bed                     : int  0 0 0 0 0 0 0 0 0 0 ...
##  $ Accessible.height.toilet                  : int  0 0 0 0 0 0 0 0 0 0 ...
##  $ Air.conditioning                          : int  1 1 1 1 1 1 0 0 1 1 ...
##  $ BBQ.grill                                 : int  0 0 0 0 0 0 0 0 0 0 ...
##  $ Baby.bath                                 : int  0 0 0 0 0 0 0 0 0 0 ...
##  $ Baby.monitor                              : int  0 0 0 0 0 0 0 0 0 0 ...
##  $ Babysitter.recommendations                : int  0 0 0 0 0 0 0 0 0 0 ...
##  $ Bathtub                                   : int  0 0 1 0 0 0 0 0 0 0 ...
##  $ Beach.essentials                          : int  0 0 0 0 0 0 0 0 0 0 ...
##  $ Bed.linens                                : int  0 0 0 0 0 0 0 0 0 1 ...
##  $ Breakfast                                 : int  1 1 1 1 0 1 1 1 0 0 ...
##  $ Buzzer.wireless.intercom                  : int  0 0 1 1 1 0 1 0 0 0 ...
##  $ Cable.TV                                  : int  1 0 1 1 1 0 1 0 0 0 ...
##  $ Carbon.monoxide.detector                  : int  0 1 1 1 1 1 0 0 0 0 ...
##  $ Cat.s.                                    : int  0 0 0 0 1 0 1 0 0 0 ...
##  $ Changing.table                            : int  0 0 0 0 0 0 0 0 0 0 ...
##  $ Children.s.books.and.toys                 : int  0 0 0 0 0 0 0 0 0 0 ...
##  $ Children.s.dinnerware                     : int  0 0 0 0 0 0 0 0 0 0 ...
##  $ Cleaning.before.checkout                  : int  0 0 0 0 0 0 0 0 0 0 ...
##  $ Coffee.maker                              : int  0 0 0 0 0 0 0 0 0 1 ...
##  $ Cooking.basics                            : int  0 0 0 0 0 0 0 0 0 1 ...
##  $ Crib                                      : int  0 0 0 0 0 0 0 0 0 0 ...
##  $ Disabled.parking.spot                     : int  0 0 0 0 0 0 0 0 0 0 ...
##  $ Dishes.and.silverware                     : int  0 0 0 0 0 0 0 0 0 1 ...
##  $ Dishwasher                                : int  0 0 0 0 0 0 0 0 0 1 ...
##  $ Dog.s.                                    : int  1 0 0 0 0 0 0 0 0 0 ...
##  $ Doorman                                   : int  0 0 0 0 0 0 0 0 0 0 ...
##  $ Doorman.Entry                             : int  0 0 0 0 0 0 0 0 0 0 ...
##  $ Dryer                                     : int  1 1 1 1 1 1 1 1 1 1 ...
##  $ EV.charger                                : int  0 0 0 0 0 0 0 0 0 0 ...
##  $ Elevator.in.building                      : int  0 0 0 0 0 0 0 0 0 0 ...
##  $ Essentials                                : int  1 1 1 1 1 1 1 1 1 1 ...
##  $ Ethernet.connection                       : int  0 0 0 0 0 0 0 0 0 0 ...
##  $ Extra.pillows.and.blankets                : int  0 0 0 0 0 0 0 0 0 1 ...
##  $ Family.kid.friendly                       : int  1 1 1 0 1 1 0 0 1 0 ...
##  $ Fire.extinguisher                         : int  1 0 1 1 1 1 0 0 0 0 ...
##  $ Fireplace.guards                          : int  0 0 0 0 0 0 0 0 0 0 ...
##  $ Firm.matress                              : int  0 0 0 0 0 0 0 0 0 0 ...
##  $ First.aid.kit                             : int  0 0 1 1 1 1 1 0 0 0 ...
##  $ Flat.smooth.pathway.to.front.door         : int  0 0 0 0 0 0 0 0 0 0 ...
##  $ Free.parking.on.premises                  : int  1 0 1 0 1 0 0 0 0 0 ...
##  $ Free.parking.on.street                    : int  0 0 0 0 0 0 0 0 0 0 ...
##  $ Game.console                              : int  0 0 0 0 0 0 0 0 0 0 ...
##  $ Garden.or.backyard                        : int  0 0 0 0 0 0 0 0 0 0 ...
##  $ Grab.rails.for.shower.and.toilet          : int  0 0 0 0 0 0 0 0 0 0 ...
##  $ Gym                                       : int  0 0 0 1 0 0 0 0 0 0 ...
##   [list output truncated]

Load Chicago Map using qmap function.

ChicagoMap<-qmap("Chicago", zoom = 14)
## Map from URL : http://maps.googleapis.com/maps/api/staticmap?center=Chicago&zoom=14&size=640x640&scale=2&maptype=terrain&language=en-EN&sensor=false
## Information from URL : http://maps.googleapis.com/maps/api/geocode/json?address=Chicago&sensor=false
## Warning: `panel.margin` is deprecated. Please use `panel.spacing` property
## instead
ChicagoMap

GGPLOTS on Map

Basic Scatter Plot of listings

Every point onthe map is a listing.We can cleary see the density of listing is more near the main city

ChicagoMap+geom_point(aes(x = CleanedListings$longitude, y = CleanedListings$latitude), data = CleanedListings)+ggtitle("Map Of Chicago")
## Warning: Removed 4427 rows containing missing values (geom_point).

Scatter Plot for different room types available

We can see through the graph that listings with room type Entire Home/apartments are more in number as compared to private rooms and shared rooms.One reason for apartments being more in number is because most of the listings are clustered near the main city area where number of apartments will definately be more then independent houses (a part of which will be private room).Apartments are also more preferred by guests with respect to privacy and amenities available.

ChicagoMap+
geom_point(aes(x = CleanedListings$longitude, y = CleanedListings$latitude, colour = CleanedListings$room_type),
              data = CleanedListings)+
ggtitle("Room Types availability IN Chicago")
## Warning: Removed 4427 rows containing missing values (geom_point).

Refined map according to room type

We used a size dimension to make the above graph more interprettable.Its clearly seen that the room type which is least in number is shown as bigger circles on the map to make it more readable.“Shared Room Type” pictured as Big Blue circles.

ChicagoMap +
geom_point(aes(x = CleanedListings$longitude, y = CleanedListings$latitude,
colour = room_type, size = room_type),
data = CleanedListings)
## Warning: Using size for a discrete variable is not advised.
## Warning: Removed 4427 rows containing missing values (geom_point).

Expensive prices

This plot shows the listings according to the price . Price is multiplied by the cluster size and then shown on map so costlier the listing, bigger the circle.

CleanedListings$price <- as.numeric(gsub("[\\$,]", "", CleanedListings$price))
CgClusterSize <- 0.005
ChicagoMap+ geom_point(aes(x=longitude, y=latitude,colour = CleanedListings$neighbourhood), 
  data=CleanedListings, col="REd", alpha=0.5,
  size=CleanedListings$price*CgClusterSize) +
  scale_size_continuous(range=range(CleanedListings$price)) +
  ggtitle("Expensiveness of Airbnb Properties in Chicago")
## Warning: Removed 4427 rows containing missing values (geom_point).

Density Distribution Map

Areas with higher number of listings have more number of lines and are darker in colour.This is an enhanced version of scatter plot to show density of listings. Since there are no points , it is easy to interpret the highly dense areas with just the colour and circle density in area.

ChicagoMap  + geom_density2d(data = CleanedListings, aes(x = longitude, y =latitude), 
  size = 0.3)+
stat_density2d(data = CleanedListings, aes(x = longitude, y = latitude, 
fill = ..level.., alpha = ..level..), size = 0.001, 
bins = 16, geom = "polygon") + 
scale_fill_gradient(low = "purple", high = "black",name = "Density") + 
scale_alpha(range = c(0.05,0.1), guide = FALSE) +
ggtitle("Density Distribution of Airbnb Properties in CHicago")
## Warning: Removed 4427 rows containing non-finite values (stat_density2d).

## Warning: Removed 4427 rows containing non-finite values (stat_density2d).

Leaflet Package Maps

Listings according to Reviews

This package allows Zooming of maps .This Map to show listings with higher number of reviews[Reviews >300].We can use Circle markers and and if else logic to mark areas with higher number of reviews .Red hollow circles in below map show such areas.

leaflet(data = CleanedListings) %>%  addProviderTiles("Stamen.Watercolor") %>%
 addProviderTiles("Stamen.TonerHybrid") %>%
  addCircleMarkers(~longitude, ~latitude, radius = ifelse(CleanedListings$number_of_reviews > 300, 20, 0.1),
                   color = ifelse(CleanedListings$number_of_reviews > 300, "red", "purple"),
                   fillOpacity = 0.1)

Map to show listings with higher prices[Price >$3000]

Used leaflet library . We can use Circle markers and and if else logic to mark areas with highest prices(in this case more then $3000) .Red hollow circles in below map show such areas.

leaflet(data = CleanedListings) %>%  addProviderTiles("Stamen.Watercolor") %>%
 addProviderTiles("Stamen.TonerHybrid") %>%
  addCircleMarkers(~longitude, ~latitude, radius = ifelse(CleanedListings$price > 3000, 20, 0.1),
                   color = ifelse(CleanedListings$price > 3000, "red", "green"),
                   fillOpacity = 0.4)

Highlighting Density according to neighbourhood

Used leaflet function with “DarkMatter” as basemaps to highlight listings according to neighbourhood.The colour palatte used in this map is “topo.colors(25)” that makes it better to interpret on black background.

factpal <- colorFactor(topo.colors(25), CleanedListings$neighbourhood_cleansed)

popup <- paste0("<strong>'hood: </strong>", CleanedListings$neighbourhood_cleansed)


leaflet(CleanedListings) %>% addProviderTiles("CartoDB.DarkMatter") %>%
  addCircleMarkers(
    color = ~factpal(neighbourhood_cleansed),
    stroke = FALSE, fillOpacity = 0.5, radius = 1.2,
    popup = ~popup
  )%>%addLegend(pal = factpal, values = ~CleanedListings$neighbourhood_cleansed, opacity = 1, title="Density Vs Neighbourhood")
## Assuming "longitude" and "latitude" are longitude and latitude, respectively

Super hosts in Chicago

This map shows Super hosts and Not Super hosts on map.The number of non-super hosts are more then Super hosts as it depends on lot of factors like number of reviews, response time, cancellation policies , instant bookings availability etc.Provided a host fulfills all these criterias, he would count as a super host hence the number of super hosts so far in the listings might be less.

ChicagoMap+
  geom_point(data=CleanedListings,
             aes(x=longitude,y=latitude,color=host_is_superhost),
             size=4,alpha=.7)+scale_colour_gradient(low = "orange")+ggtitle("Super Hosts in Chicago")
## Warning: Removed 4427 rows containing missing values (geom_point).

Instant Bookings available

Map to show areas where instant bookings are available.Again the number of listings with easy booking are lesser is number.

ChicagoMap+
  geom_point(data=CleanedListings,
             aes(x=longitude,y=latitude,color=instant_bookable),
             size=4,alpha=.7)+scale_colour_gradient(low = "Green")+ggtitle("Instant Bookings")
## Warning: Removed 4427 rows containing missing values (geom_point).

Cancellation Policies

This map shows listings on map marked according to cancellation policies.We notice approximately equal number of listings for flexible and strict with slightly lesser for moderate type.

ChicagoMap +
geom_point(aes(x = CleanedListings$longitude, y = CleanedListings$latitude,
colour = cancellation_policy, size = cancellation_policy),
data = CleanedListings)+ggtitle("Cancellation Policies")
## Warning: Using size for a discrete variable is not advised.
## Warning: Removed 4427 rows containing missing values (geom_point).